################
# FIGURE 2
################
library(rstan)
library(stargazer)
library(data.table)
library(countrycode)
library(Amelia)
load("migration_model_data.RData")
source("create_bias_measures.R")
vars <- c("log_flow", "log_stock", "distw", "contig", "colony", "comlang_off",
"pop_d", "log_gdp_dif", "urate_d", "comcur", "lib_dem_o", "lib_dem_d",
"year_ed_15_plus_o", "iso3_o", "iso3_d", "year", "region_o",
"region_d", "gdpcap_o", "gdpcap_d", "cw_o", "dyad")
d <- bilateral_migration_data[, vars, with = FALSE]
d$log_dist_w <- log(d$distw)
d$log_pop_d <- log(d$pop_d)
d$log_urate_d <- log(d$urate_d)
d$log_gdpcap_o <- log(d$gdpcap_o)
d$log_gdpcap_d <- log(d$gdpcap_d)
d$log_gdpcap_o2 <- d$log_gdpcap_o^2
d$log_gdpcap_d2 <- d$log_gdpcap_d^2
d$year <- as.numeric(d$year)
ref_table <- d[, .(dyad, year, log_flow)]
d[, c("region_o", "region_d",
"pop_d", "urate_d", "distw", "gdpcap_o", "gdpcap_d", "dyad") := NULL]
# fit 5 imputed datasets
imp_amelia <- amelia(x = d, m = 1,
idvars = c("iso3_o", "iso3_d"), ts = "year")
year <- as.numeric(imp_amelia$imputations$imp1$year)
iso3o <- as.numeric(as.factor(imp_amelia$imputations$imp1$iso3_o))
iso3d <- as.numeric(as.factor(imp_amelia$imputations$imp1$iso3_d))
ys <- as.numeric(imp_amelia$imputations$imp1$log_flow)
X <- as.matrix(imp_amelia$imputations$imp1)
X <- X[, -c(1, 11:13)]
for(k in 1:ncol(X)){
X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1")
}
X <- cbind(1, X)
X <- apply(X, 2, as.numeric)
dl1 <- list(
K = ncol(X),
N = nrow(X),
y_obs = ys,
X = X,
N_d = max(iso3d),
N_o = max(iso3o),
i_d = iso3d,
i_o = iso3o,
N_year = max(year),
i_year = year
)
rm(imp_amelia)
rm(d)
rm(bilateral_migration_data)
# for(i in 1:5)
# {
#   # tmp1 <- eval(parse(text = paste0("imp_amelia$imputations$imp", i, "$year")))
#   year[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$year")))))
#   iso3d[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$iso3_d")))))
#   iso3o[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$iso3_o")))))
#   ys[[i]] <- eval(parse(text = paste0("imp_amelia$imputations$imp",
#     i, "$log_flow")))
#   X <- as.matrix(eval(parse(text = paste0("imp_amelia$imputations$imp", i))))
#   X <- X[, -c(1, 11:13)]
#   for(k in 1:ncol(X)){
#     X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1")
#   }
#   X <- cbind(1, X)
#   xs[[i]] <- apply(X, 2, as.numeric)
# }
#
# dl1 <- list(
#   K = ncol(xs[[1]]),
#   N = nrow(xs[[1]]),
#   y_obs = ys[[1]],
#   X = xs[[1]],
#   N_d = max(iso3d[[1]]),
#   N_o = max(iso3o[[1]]),
#   i_d = iso3d[[1]],
#   i_o = iso3o[[1]],
#   N_year = max(year[[1]]),
#   i_year = year[[1]]
# )
sm <- stan_model("weibull_hurdle_6.stan")
sf <- vb(sm, data = dl1, output_samples = 1000)
ex <- extract(sf)
resids <- ex$resids
rm(sm, sf, ex)
rm(x)
rm(X)
rm(dl1)
bias <- apply(resids, 2, median)
ref_table$bias <- bias
ref_table$flow <- ref_table$log_flow
bias_dat <- ref_table
bias_dat <- create_bias_measures(bias_dat)
bias_dat <- bias_dat[, .(bias = mean(bias)), by = c("origin_name", "year")]
oecd <- c("Australia", "Austria", "Belgium", "Canada", "Denmark", "Finland",
"France", "Germany", "Greece", "Iceland", "Ireland", "Italy",
"Japan", "Mexico", "Netherlands", "New Zealand", "Norway", "Portugal",
"Spain", "Sweden", "Switzerland", "United Kingdom", "United States"
)
skin <- read.csv("skincolor.csv")
bias_dat <- merge(bias_dat, skin, by.x = "origin_name", by.y = "country")
bias_dat[, oecd := ifelse(origin_name %in% oecd, 1, 0)]
bias_dat[, north := ifelse(medical %in% c("Chinese", "Japanese", "White"), 1, 0)]
bias_dat[, black := ifelse(!(medical %in% c("Black")), 0, 1)]
oecd <- data.table::data.table(bias = bias_dat[oecd == 1, mean(bias), by = year],
cat = "OECD")
black <- data.table::data.table(bias = bias_dat[black == 1, mean(bias), by = year],
cat = "Black")
south <- data.table::data.table(bias = bias_dat[north == 0, mean(bias), by = year],
cat = "Global South")
ggdat <- rbind(oecd, black, south)
ggdat
setnames(ggdat, c("year", "bias", "race"))
ggdat$year <- factor(as.numeric(ggdat$year),
labels = c("1991-1995", "1996-2000", "2001-2005", "2006-2010"))
ggplot(ggdat, aes(x = year, y = bias, group = race)) +
geom_line(aes(linetype = race)) +
theme_minimal(base_size = 14) +
ylab("Average Deviation") +
xlab("Time Period") +
theme(legend.title = element_blank(),
text = element_text(family = "Roboto Condensed")) +
ggtitle("Comparing Deviation Densities")
library(rstan)
library(stargazer)
library(data.table)
library(countrycode)
library(Amelia)
load("migration_model_data.RData")
source("create_bias_measures.R")
set.seed(1)
vars <- c("log_flow", "log_stock", "distw", "contig", "colony", "comlang_off",
"pop_d", "log_gdp_dif", "urate_d", "comcur", "lib_dem_o", "lib_dem_d",
"year_ed_15_plus_o", "iso3_o", "iso3_d", "year", "region_o",
"region_d", "gdpcap_o", "gdpcap_d", "cw_o", "dyad")
d <- bilateral_migration_data[, vars, with = FALSE]
d$log_dist_w <- log(d$distw)
d$log_pop_d <- log(d$pop_d)
d$log_urate_d <- log(d$urate_d)
d$log_gdpcap_o <- log(d$gdpcap_o)
d$log_gdpcap_d <- log(d$gdpcap_d)
d$log_gdpcap_o2 <- d$log_gdpcap_o^2
d$log_gdpcap_d2 <- d$log_gdpcap_d^2
d$year <- as.numeric(d$year)
ref_table <- d[, .(dyad, year, log_flow)]
d[, c("region_o", "region_d",
"pop_d", "urate_d", "distw", "gdpcap_o", "gdpcap_d", "dyad") := NULL]
# fit 5 imputed datasets
imp_amelia <- amelia(x = d, m = 1,
idvars = c("iso3_o", "iso3_d"), ts = "year")
year <- as.numeric(imp_amelia$imputations$imp1$year)
iso3o <- as.numeric(as.factor(imp_amelia$imputations$imp1$iso3_o))
iso3d <- as.numeric(as.factor(imp_amelia$imputations$imp1$iso3_d))
ys <- as.numeric(imp_amelia$imputations$imp1$log_flow)
X <- as.matrix(imp_amelia$imputations$imp1)
X <- X[, -c(1, 11:13)]
for(k in 1:ncol(X)){
X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1")
}
X <- cbind(1, X)
X <- apply(X, 2, as.numeric)
dl1 <- list(
K = ncol(X),
N = nrow(X),
y_obs = ys,
X = X,
N_d = max(iso3d),
N_o = max(iso3o),
i_d = iso3d,
i_o = iso3o,
N_year = max(year),
i_year = year
)
rm(imp_amelia)
rm(d)
rm(bilateral_migration_data)
# for(i in 1:5)
# {
#   # tmp1 <- eval(parse(text = paste0("imp_amelia$imputations$imp", i, "$year")))
#   year[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$year")))))
#   iso3d[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$iso3_d")))))
#   iso3o[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$iso3_o")))))
#   ys[[i]] <- eval(parse(text = paste0("imp_amelia$imputations$imp",
#     i, "$log_flow")))
#   X <- as.matrix(eval(parse(text = paste0("imp_amelia$imputations$imp", i))))
#   X <- X[, -c(1, 11:13)]
#   for(k in 1:ncol(X)){
#     X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1")
#   }
#   X <- cbind(1, X)
#   xs[[i]] <- apply(X, 2, as.numeric)
# }
#
# dl1 <- list(
#   K = ncol(xs[[1]]),
#   N = nrow(xs[[1]]),
#   y_obs = ys[[1]],
#   X = xs[[1]],
#   N_d = max(iso3d[[1]]),
#   N_o = max(iso3o[[1]]),
#   i_d = iso3d[[1]],
#   i_o = iso3o[[1]],
#   N_year = max(year[[1]]),
#   i_year = year[[1]]
# )
sm <- stan_model("weibull_hurdle_6.stan")
sf <- vb(sm, data = dl1, output_samples = 1000)
ex <- extract(sf)
resids <- ex$resids
rm(sm, sf, ex, X, dl1)
bias <- apply(resids, 2, median)
ref_table$bias <- bias
ref_table$flow <- ref_table$log_flow
bias_dat <- ref_table
bias_dat <- create_bias_measures(bias_dat)
bias_dat <- bias_dat[, .(bias = mean(bias)), by = c("origin_name", "year")]
oecd <- c("Australia", "Austria", "Belgium", "Canada", "Denmark", "Finland",
"France", "Germany", "Greece", "Iceland", "Ireland", "Italy",
"Japan", "Mexico", "Netherlands", "New Zealand", "Norway", "Portugal",
"Spain", "Sweden", "Switzerland", "United Kingdom", "United States"
)
skin <- read.csv("skincolor.csv")
bias_dat <- merge(bias_dat, skin, by.x = "origin_name", by.y = "country")
bias_dat[, oecd := ifelse(origin_name %in% oecd, 1, 0)]
bias_dat[, north := ifelse(medical %in% c("Chinese", "Japanese", "White"), 1, 0)]
bias_dat[, black := ifelse(!(medical %in% c("Black")), 0, 1)]
oecd <- data.table::data.table(bias = bias_dat[oecd == 1, mean(bias), by = year],
cat = "OECD")
black <- data.table::data.table(bias = bias_dat[black == 1, mean(bias), by = year],
cat = "Black")
south <- data.table::data.table(bias = bias_dat[north == 0, mean(bias), by = year],
cat = "Global South")
ggdat <- rbind(oecd, black, south)
setnames(ggdat, c("year", "bias", "race"))
ggdat$year <- factor(as.numeric(ggdat$year),
labels = c("1991-1995", "1996-2000", "2001-2005", "2006-2010"))
#tiff("black_bias_over_time_am.tiff", width = 6, height = 6, units = 'in', res = 600)
ggplot(ggdat, aes(x = year, y = bias, group = race)) +
geom_line(aes(linetype = race)) +
theme_minimal(base_size = 14) +
ylab("Average Deviation") +
xlab("Time Period") +
theme(legend.title = element_blank(),
text = element_text(family = "Roboto Condensed")) +
ggtitle("Comparing Deviation Densities")
#dev.off()
library(rstan)
library(stargazer)
library(data.table)
library(countrycode)
library(Amelia)
load("migration_model_data.RData")
source("create_bias_measures.R")
set.seed(1)
vars <- c("log_flow", "log_stock", "distw", "contig", "colony", "comlang_off",
"pop_d", "log_gdp_dif", "urate_d", "comcur", "lib_dem_o", "lib_dem_d",
"year_ed_15_plus_o", "iso3_o", "iso3_d", "year", "region_o",
"region_d", "gdpcap_o", "gdpcap_d", "cw_o", "dyad")
d <- bilateral_migration_data[, vars, with = FALSE]
d$log_dist_w <- log(d$distw)
d$log_pop_d <- log(d$pop_d)
d$log_urate_d <- log(d$urate_d)
d$log_gdpcap_o <- log(d$gdpcap_o)
d$log_gdpcap_d <- log(d$gdpcap_d)
d$log_gdpcap_o2 <- d$log_gdpcap_o^2
d$log_gdpcap_d2 <- d$log_gdpcap_d^2
d$year <- as.numeric(d$year)
ref_table <- d[, .(dyad, year, log_flow)]
d[, c("region_o", "region_d",
"pop_d", "urate_d", "distw", "gdpcap_o", "gdpcap_d", "dyad") := NULL]
# fit 5 imputed datasets
imp_amelia <- amelia(x = d, m = 1,
idvars = c("iso3_o", "iso3_d"), ts = "year")
year <- as.numeric(imp_amelia$imputations$imp1$year)
iso3o <- as.numeric(as.factor(imp_amelia$imputations$imp1$iso3_o))
iso3d <- as.numeric(as.factor(imp_amelia$imputations$imp1$iso3_d))
ys <- as.numeric(imp_amelia$imputations$imp1$log_flow)
X <- as.matrix(imp_amelia$imputations$imp1)
X <- X[, -c(1, 11:13)]
for(k in 1:ncol(X)){
X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1")
}
X <- cbind(1, X)
X <- apply(X, 2, as.numeric)
dl1 <- list(
K = ncol(X),
N = nrow(X),
y_obs = ys,
X = X,
N_d = max(iso3d),
N_o = max(iso3o),
i_d = iso3d,
i_o = iso3o,
N_year = max(year),
i_year = year
)
rm(imp_amelia)
rm(d)
rm(bilateral_migration_data)
# for(i in 1:5)
# {
#   # tmp1 <- eval(parse(text = paste0("imp_amelia$imputations$imp", i, "$year")))
#   year[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$year")))))
#   iso3d[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$iso3_d")))))
#   iso3o[[i]] <- as.numeric(factor(eval(parse(text =
#     paste0("imp_amelia$imputations$imp", i, "$iso3_o")))))
#   ys[[i]] <- eval(parse(text = paste0("imp_amelia$imputations$imp",
#     i, "$log_flow")))
#   X <- as.matrix(eval(parse(text = paste0("imp_amelia$imputations$imp", i))))
#   X <- X[, -c(1, 11:13)]
#   for(k in 1:ncol(X)){
#     X[, k] <- arm::rescale(X[, k], binary.inputs = "0/1")
#   }
#   X <- cbind(1, X)
#   xs[[i]] <- apply(X, 2, as.numeric)
# }
#
# dl1 <- list(
#   K = ncol(xs[[1]]),
#   N = nrow(xs[[1]]),
#   y_obs = ys[[1]],
#   X = xs[[1]],
#   N_d = max(iso3d[[1]]),
#   N_o = max(iso3o[[1]]),
#   i_d = iso3d[[1]],
#   i_o = iso3o[[1]],
#   N_year = max(year[[1]]),
#   i_year = year[[1]]
# )
sm <- stan_model("weibull_hurdle_6.stan")
sf <- vb(sm, data = dl1, output_samples = 1000)
ex <- extract(sf)
resids <- ex$resids
rm(sm, sf, ex, X, dl1)
bias <- apply(resids, 2, median)
ref_table$bias <- bias
ref_table$flow <- ref_table$log_flow
bias_dat <- ref_table
bias_dat <- create_bias_measures(bias_dat)
bias_dat <- bias_dat[, .(bias = mean(bias)), by = c("origin_name", "year")]
oecd <- c("Australia", "Austria", "Belgium", "Canada", "Denmark", "Finland",
"France", "Germany", "Greece", "Iceland", "Ireland", "Italy",
"Japan", "Mexico", "Netherlands", "New Zealand", "Norway", "Portugal",
"Spain", "Sweden", "Switzerland", "United Kingdom", "United States"
)
skin <- read.csv("skincolor.csv")
bias_dat <- merge(bias_dat, skin, by.x = "origin_name", by.y = "country")
bias_dat[, oecd := ifelse(origin_name %in% oecd, 1, 0)]
bias_dat[, north := ifelse(medical %in% c("Chinese", "Japanese", "White"), 1, 0)]
bias_dat[, black := ifelse(!(medical %in% c("Black")), 0, 1)]
oecd <- data.table::data.table(bias = bias_dat[oecd == 1, mean(bias), by = year],
cat = "OECD")
black <- data.table::data.table(bias = bias_dat[black == 1, mean(bias), by = year],
cat = "Black")
south <- data.table::data.table(bias = bias_dat[north == 0, mean(bias), by = year],
cat = "Global South")
ggdat <- rbind(oecd, black, south)
setnames(ggdat, c("year", "bias", "race"))
ggdat$year <- factor(as.numeric(ggdat$year),
labels = c("1991-1995", "1996-2000", "2001-2005", "2006-2010"))
#tiff("black_bias_over_time_am.tiff", width = 6, height = 6, units = 'in', res = 600)
ggplot(ggdat, aes(x = year, y = bias, group = race)) +
geom_line(aes(linetype = race)) +
theme_minimal(base_size = 14) +
ylab("Average Deviation") +
xlab("Time Period") +
theme(legend.title = element_blank(),
text = element_text(family = "Roboto Condensed")) +
ggtitle("Comparing Deviation Densities")
#dev.off()
################
# FIGURE 2
################
library(rstan)
